import requests
from lxml import etree
import pymysql
"""
二级页面嵌套爬取
    爬取一级： 列表栏的url
    进去二级页面 ：情话内容

"""


class QingHua():
    def __init__(self):

        self.headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
        }
        # 链接数据库                 用户名        密码
        self.db = pymysql.connect(user='root', password='lhdtyy1021' ,database='',charset='utf8')
        self.cursor = self.db.cursor() #获取操作游标


    def get_data(self,url_m):
        # print(url_m)
        request = requests.get(url_m,headers=self.headers)
        if request.status_code == 200:
            # print(request.text)
            data = request.text
            return data

    def parese_data(self,data):
        data_xm = etree.HTML(data)
        data_saohua = data_xm.xpath('//div[@class="stbody "]/a/p/text() | //div[@class="stbody first"]/a/p/text()')
        print(data_saohua)
        return data_saohua
    def save_data(self,data_saohua):
        sql = 'insert into XC(text) values(%s)'  #sql 语句
        self.cursor.execute(sql,[data_saohua])   # 执行sql语句
        self.db.commit()   #表示提交

if __name__ == '__main__':
    url_zhu = 'http://www.ainicr.cn/qh/t83.html'
    r = requests.get(url_zhu)
    a = r.text
    xm = etree.HTML(a)
    hrefs = xm.xpath('//div [@class="item"]//h4/a/@href')
    for i in hrefs:
        # print(i)
        url_m = i
        a = QingHua()
        data = a.get_data(i)
        # print(i)
        data_saohua = a.parese_data(data)
        a.save_data(data_saohua)













